import pandas as pd
pd.options.display.float_format = '{:,.4f}'.format
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error,mean_absolute_percentage_error
from scipy import optimize
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')
# Average LTV per player. Caluculated as total revenue divided by total install of the cohort
d1_ltv = 0.082
d3_ltv = 0.098
d7_ltv = 0.117
d14_ltv = 0.137
d30_ltv = 0.17
d60_ltv = 0.19
d90_ltv = 0.21
one_three = list(np.linspace(d1_ltv, d3_ltv, 3, endpoint=True))
three_seven = list(np.linspace(d3_ltv, d7_ltv, 5, endpoint=True))[1:]
seven_fourteen = list(np.linspace(d7_ltv, d14_ltv, 8, endpoint=True))[1:]
fourteen_thirty = list(np.linspace(d14_ltv, d30_ltv, 17, endpoint=True))[1:]
thirty_sixty = list(np.linspace(d30_ltv, d60_ltv, 31, endpoint=True))[1:]
sixty_ninety = list(np.linspace(d60_ltv, d90_ltv, 31, endpoint=True))[1:]
ltvs = [round(item, 4) for item in \
one_three + three_seven + seven_fourteen + fourteen_thirty + thirty_sixty + sixty_ninety]
df = pd.DataFrame.from_dict({'Day':[x for x in range(1,91)],
'LTV': [0.082, 0.09, 0.098, 0.1028, 0.1075, 0.1122,
0.117, 0.1199, 0.1227, 0.1256, 0.1284, 0.1313,
0.1341, 0.137, 0.1391, 0.1411, 0.1432, 0.1453,
0.1473, 0.1494, 0.1514, 0.1535, 0.1556, 0.1576,
0.1597, 0.1618, 0.1638, 0.1659, 0.1679, 0.17,
0.1707, 0.1713, 0.172, 0.1727, 0.1733, 0.174,
0.1747, 0.1753, 0.176, 0.1767, 0.1773, 0.178,
0.1787, 0.1793, 0.18, 0.1807, 0.1813, 0.182,
0.1827, 0.1833, 0.184, 0.1847, 0.1853, 0.186,
0.1867, 0.1873, 0.188, 0.1887, 0.1893, 0.19,
0.1907, 0.1913, 0.192, 0.1927, 0.1933, 0.194,
0.1947, 0.1953, 0.196, 0.1967, 0.1973, 0.198,
0.1987, 0.1993, 0.2, 0.2007, 0.2013, 0.202,
0.2027, 0.2033, 0.204, 0.2047, 0.2053, 0.206,
0.2067, 0.2073, 0.208, 0.2087, 0.2093, 0.21]
})
df
| Day | LTV | |
|---|---|---|
| 0 | 1 | 0.0820 |
| 1 | 2 | 0.0900 |
| 2 | 3 | 0.0980 |
| 3 | 4 | 0.1028 |
| 4 | 5 | 0.1075 |
| ... | ... | ... |
| 85 | 86 | 0.2073 |
| 86 | 87 | 0.2080 |
| 87 | 88 | 0.2087 |
| 88 | 89 | 0.2093 |
| 89 | 90 | 0.2100 |
90 rows × 2 columns
fig = go.Figure()
# Add traces
fig.add_trace(go.Scatter(x=df.Day, y=df.LTV,
mode='markers',
name='Raw Data'))
fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
'title': dict(text='<b>LTV curves <b>',x=0.5,y=0.95)})
def linear_function(x, a, b):
"""Linear regression with extra steps"""
return a * x + b
def logarithmic_function(x, a, b, c):
"""Return values from a general log function."""
return a * np.log(b * x) + c
def exponential_function(x, a, k, b):
"""Stanard exponential equation."""
return a * np.exp(x*k) + b
# using the scipy library to fit the x- and y-axis data
# this function returns:
# popt_linear: this contains the fitting parameters
# pcov_linear: estimated covariance of the fitting paramters
def linear_function(x, a, b):
"""Linear regression with extra steps"""
return a * x + b
popt_linear, pcov_linear = optimize.curve_fit(linear_function, df.Day, df.LTV)
(popt_linear, pcov_linear )
(array([0.00110147, 0.12211755]),
array([[ 1.89963722e-09, -8.64334941e-08],
[-8.64334941e-08, 5.21482085e-06]]))
linear_function(1002, *popt_linear)
1.2257906315010743
linear_function(range(1,91), *popt_linear)
array([0.12321902, 0.12432049, 0.12542196, 0.12652343, 0.1276249 ,
0.12872637, 0.12982784, 0.13092931, 0.13203078, 0.13313225,
0.13423372, 0.13533519, 0.13643666, 0.13753813, 0.13863961,
0.13974108, 0.14084255, 0.14194402, 0.14304549, 0.14414696,
0.14524843, 0.1463499 , 0.14745137, 0.14855284, 0.14965431,
0.15075578, 0.15185725, 0.15295872, 0.15406019, 0.15516166,
0.15626313, 0.1573646 , 0.15846607, 0.15956754, 0.16066901,
0.16177048, 0.16287195, 0.16397342, 0.16507489, 0.16617636,
0.16727783, 0.1683793 , 0.16948077, 0.17058224, 0.17168371,
0.17278518, 0.17388665, 0.17498812, 0.17608959, 0.17719106,
0.17829253, 0.179394 , 0.18049547, 0.18159694, 0.18269841,
0.18379988, 0.18490135, 0.18600282, 0.18710429, 0.18820576,
0.18930723, 0.1904087 , 0.19151017, 0.19261164, 0.19371311,
0.19481458, 0.19591605, 0.19701752, 0.19811899, 0.19922046,
0.20032193, 0.2014234 , 0.20252487, 0.20362634, 0.20472781,
0.20582928, 0.20693075, 0.20803222, 0.20913369, 0.21023516,
0.21133663, 0.2124381 , 0.21353957, 0.21464104, 0.21574251,
0.21684398, 0.21794546, 0.21904693, 0.2201484 , 0.22124987])
print(
f"""R2:{r2_score(df.LTV, linear_function(range(1,91), *popt_linear)).round(2)}
MAE:{mean_absolute_error(df.LTV, linear_function(range(1,91), *popt_linear)).round(3)}
MAPE:{(mean_absolute_percentage_error(df.LTV, linear_function(range(1,91), *popt_linear))*100).round(2)}%""")
R2:0.88 MAE:0.008 MAPE:5.61%
fig = go.Figure()
# Add traces
fig.add_trace(go.Scatter(x=df.Day, y=df.LTV,
mode='markers',
name='Raw Data'))
fig.add_trace(go.Scatter(x=[x for x in range(1,361)], y=linear_function(range(1,361), *popt_linear),
mode='lines',
name='Linear Curve'))
fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
'title': dict(text='<b>LTV curves <b>',x=0.5,y=0.95)})
# using the scipy library to fit the x- and y-axis data
# this function returns:
# popt_log: this contains the fitting parameters
# pcov_log: estimated covariance of the fitting paramters
def logarithmic_function(x, a, b, c):
"""Return values from a general log function."""
return a * np.log(b * x) + c
popt_log, pcov_log = optimize.curve_fit(logarithmic_function, df.Day, df.LTV)
#logarithmic_function(5, *popt_log)
popt_log
array([0.03291741, 0.01373978, 0.19700252])
print(
f"""R2:{r2_score(df.LTV, logarithmic_function(range(1,91), *popt_log)).round(2)}
MAE:{mean_absolute_error(df.LTV, logarithmic_function(range(1,91), *popt_log)).round(3)}
MAPE:{(mean_absolute_percentage_error(df.LTV, logarithmic_function(range(1,91), *popt_log))*100).round(2)}%""")
R2:0.98 MAE:0.003 MAPE:1.99%
import plotly.graph_objects as go
fig = go.Figure()
# Add traces
fig.add_trace(go.Scatter(x=df.Day, y=df.LTV,
mode='markers',
name='Raw Data'))
fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=linear_function(range(1,181), *popt_linear),
mode='lines',
name='Linear Curve'))
fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=logarithmic_function(range(1,181), *popt_log),
mode='lines',
name='Log Curve'))
fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
'title': dict(text='<b>LTV curves <b>',x=0.5,y=0.95)})
# using the scipy library to fit the x- and y-axis data
# this function returns:
# popt_exp: this contains the fitting parameters
# pcov_exp: estimated covariance of the fitting paramters
def exponential_function(x, a, k, b):
"""Stanard exponential equation."""
#return a * np.exp(x*k) + b
return a * np.exp(x*-k) + b
popt_exp, pcov_exp = optimize.curve_fit(exponential_function, df.Day, df.LTV)
popt_exp
#exponential_function(5, *popt_exp)
array([-0.1202182 , 0.03411894, 0.2089351 ])
print(
f"""R2:{r2_score(df.LTV, exponential_function(range(1,91), *popt_exp)).round(2)}
MAE:{mean_absolute_error(df.LTV, exponential_function(range(1,91), *popt_exp)).round(3)}
MAPE:{(mean_absolute_percentage_error(df.LTV, exponential_function(range(1,91), *popt_exp))*100).round(2)}%""")
import plotly.graph_objects as go
fig = go.Figure()
# Add traces
fig.add_trace(go.Scatter(x=df.Day, y=df.LTV,
mode='markers',
name='Raw Data'))
fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=linear_function(range(1,181), *popt_linear),
mode='lines',
name='Linear Curve'))
fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=logarithmic_function(range(1,181), *popt_log),
mode='lines',
name='Log Curve'))
fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=exponential_function(range(1,181), *popt_exp),
mode='lines',
name='Exp Curve'))
fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
'title': dict(text='<b>LTV curves <b>',x=0.5,y=0.95)})
fig.update_layout(legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="center",
x=0.5
))
pd.DataFrame.from_dict({"Curve": ['Linear',
'Log',
'Exp']
,"R2":[r2_score(df.LTV, linear_function(range(1,91), *popt_linear)).round(4),
r2_score(df.LTV, logarithmic_function(range(1,91), *popt_log)).round(4),
r2_score(df.LTV, exponential_function(range(1,91), *popt_exp)).round(4)]
,"MAE":[mean_absolute_error(df.LTV, linear_function(range(1,91), *popt_linear)).round(4),
mean_absolute_error(df.LTV, logarithmic_function(range(1,91), *popt_log)).round(4),
mean_absolute_error(df.LTV, exponential_function(range(1,91), *popt_exp)).round(4)]
,"MAPE":[(mean_absolute_percentage_error(df.LTV, linear_function(range(1,91), *popt_linear))*100).round(2),
(mean_absolute_percentage_error(df.LTV, logarithmic_function(range(1,91), *popt_log))*100).round(2),
(mean_absolute_percentage_error(df.LTV, exponential_function(range(1,91), *popt_exp))*100).round(2)]
})
| Curve | R2 | MAE | MAPE | |
|---|---|---|---|---|
| 0 | Linear | 0.8789 | 0.0082 | 5.6100 |
| 1 | Log | 0.9799 | 0.0028 | 1.9900 |
| 2 | Exp | 0.9897 | 0.0026 | 1.6200 |